#################### R CODES FOR MLE REPLICATION PROJECT ######################
#################### MOOHYUNG CHO #############################################

############################## 0. Setup ########################################

# Set up workspace
rm(list=ls()) 
setwd("/Users/moohyungcho/Desktop/Spring 2015/MLE/Replication")
set.seed(6886)

# Function to load packages
loadPkg=function(toLoad){
  for(lib in toLoad){
    if(! lib %in% installed.packages()[,1])
    { install.packages(lib, repos='http://cran.rstudio.com/') }
    suppressMessages( library(lib, character.only=TRUE) ) }
}

# Load libraries
packs=c('foreign', 'MASS', 'ggplot2', 'reshape2', 'arm', 'grid', 'gridExtra', 
        'separationplot', 'lmtest', 'plm', 'car', 'sandwich')
loadPkg(packs)

# Load data
load("/Users/moohyungcho/Desktop/Spring 2015/MLE/Replication/TwoCourts.RData")

######

################################ 1. Running the models #####################################

# subsetting the dataset
halljr = x
halljr = halljr[halljr$statchal==1,]

## I'm running Models 3 and 4 in Table 2 in Hall's Study

### model 3: vertical cases
halljr_ver = halljr[halljr$lat==0,]
formjr3 = formula(strike ~ moodstrike + constraintp +  FloorMedianOverModel + JCSPredProb + ConstraintDist + 
                    PresCourtDist + HJchairDist + SJchairDist + bills + sgsupp + amisupp + amiopp)
modeljr3 = glm(formjr3, data=halljr_ver, family="binomial")
summary(modeljr3)
coeftest(modeljr3, vcov=vcovHC(modeljr3, type='HC')) # to get robust standard error

### model 4: lateral cases
halljr_lat = halljr[halljr$lat==1,]
modeljr4 = glm(formjr3, data=halljr_lat, family="binomial")
summary(modeljr4)
coeftest(modeljr4, vcov=vcovHC(modeljr4, type='HC')) # to get robust standard error

################################# 2. Coefficient Plot ########################################

#### (1) Coefficient plot for lateral cases

coef_lat = data.frame(modeljr4$coefficients)
names(coef_lat)="coef"
cov.modeljr <- vcovHC(modeljr4, type = "HC")
rob.std.err <- sqrt(diag(cov.modeljr))
sd_lat = data.frame(rob.std.err)
names(sd_lat)="sd"

upper95 = data.frame(coef_lat + qnorm(0.975)*sd_lat)
names(upper95)="upper95"
lower95 = data.frame(coef_lat - qnorm(0.975)*sd_lat)
names(lower95)="lower95"
upper90 = data.frame(coef_lat + qnorm(0.95)*sd_lat)
names(upper90)="upper90"
lower90 = data.frame(coef_lat - qnorm(0.95)*sd_lat)
names(lower90)="lower90"
coefData = cbind(coef_lat, sd_lat, upper95, lower95) 
varname = c('(Intercept)', 'Strike Mood', 'Institutional \n Constraint', 'Predicted Support \n by the Pivotal Legislator',
            'Predicted Support \n by the Supreme Court', 'Floor Median Distance \n from Court', 'President Distance \n from Court',
            'House Jud. Chair \n Distance from Court','Senate Jud. Chair \n Distance from Court', 'Court-Curbing \n Bills',
            'Solicitor General \n Support', 'Amicus Briefs \n in Favor of the Law', 'Amicus Briefs \n Opposing the Law')
coefData = cbind(varname, coefData)

## estimates of constraint variable is to big 
## so I scaled this variable into 1/100 for presentation purpose.
## I also omitted intercept and four control variables for presentation purpose.
coefData_scaled = coefData
coefData_scaled[3,]=coefData_scaled[3,]*.01
coefData_scaled = cbind(varname, coefData_scaled[,2:5])
coefData_scaled = coefData_scaled[2:9,]

# plotting 
coefplot = ggplot(data=coefData_scaled, aes(x=varname))
coefplot = coefplot + geom_linerange(aes(ymin=lower95, ymax=upper95), size=.5) 
coefplot = coefplot + geom_point(aes(y=coef))
coefplot = coefplot + geom_hline(yintercept=0, linetype=2, color = "red") 
coefplot = coefplot + coord_flip() + xlab('') + ylab('Estimates')
coefplot = coefplot + theme(axis.ticks=element_blank())
coefplot

#### (2) Coefficient plot for vertical cases
coef_ver = data.frame(modeljr3$coefficients)
names(coef_ver)="coef"
sd_ver = data.frame(sqrt(diag(vcovHC(modeljr3, type = "HC"))))
names(sd_ver)="sd"

up95 = data.frame(coef_ver + qnorm(0.975)*sd_ver)
names(up95)="upper95"
lo95 = data.frame(coef_ver - qnorm(0.975)*sd_ver)
names(lo95)="lower95"
up90 = data.frame(coef_ver + qnorm(0.95)*sd_ver)
names(up90)="upper90"
lo90 = data.frame(coef_ver - qnorm(0.95)*sd_ver)
names(lo90)="lower90"
coefData2 = cbind(varname, coef_ver, sd_ver, up95, lo95) 

## Scaling constraint and floor median variables into 1/10 for presentation purpose
## Omitting intercept and four control variables for presentation purpose
coefData_scaled2 = coefData2
coefData_scaled2[3,]=coefData_scaled2[3,]*.1
coefData_scaled2[6,]=coefData_scaled2[6,]*.1
coefData_scaled2 = cbind(varname, coefData_scaled2[,2:5])
coefData_scaled2 = coefData_scaled2[2:9,]

## plotting
coefplot2 = ggplot(data=coefData_scaled2, aes(x=varname)) 
coefplot2=coefplot2 + geom_linerange(aes(ymin=lower95, ymax=upper95), size=0.5) 
coefplot2=coefplot2 + geom_point(aes(y=coef))
coefplot2=coefplot2 + geom_hline(yintercept=0, linetype=2, color = "red") 
coefplot2=coefplot2 + coord_flip() + xlab('') + ylab('')
coefplot2=coefplot2 + theme(axis.ticks=element_blank())
coefplot2

####### Combining two coefficient plots into one

coefplotData = rbind(coefData_scaled, coefData_scaled2)
name=rep(c('Lateral Cases', 'Vertical Cases'), each=8)
coefplotData = cbind(coefplotData, name)

coefplot_c = ggplot(coefplotData, aes(x=varname, color=name))
coefplot_c=coefplot_c + theme_bw() + geom_linerange(aes(ymin=lower95, ymax=upper95), lwd=1, position=position_dodge(width=.5)) 
coefplot_c=coefplot_c + geom_point(aes(y=coef), position=position_dodge(width=.5), shape=21, fill='white')
coefplot_c=coefplot_c + geom_hline(yintercept=0, linetype=2, color = "red") 
coefplot_c=coefplot_c + coord_flip() + xlab('') + ylab('')
coefplot_c = coefplot_c + theme(legend.position='bottom', legend.title=element_blank())
coefplot_c

## maing pdf for this plot
pdf("coefplot.pdf")
coefplot_c
dev.off()


######################## 3. 6-fold cross-validation ##############################

### (1) lateral cases

# partitioning 6 subsamples
halljr_lat$rand = sample(1:6, nrow(halljr_lat), replace=TRUE)
table(halljr_lat$rand)

# making 6 folds data
rmse=function(pred, actual){sqrt(mean(pred-actual)^2)}
coefCrossVal = NULL
perf= NULL

for (ii in 1:6){
  train=halljr_lat[halljr_lat$rand!=ii,]
  test=halljr_lat[halljr_lat$rand==ii,]
  
  # get coefficients
  model=glm(formjr3, data=train, family = "binomial")
  # calculate robust stnadard errors
  rse = sqrt(diag(vcovHC(model, type = "HC1")))
  trainRes=cbind(summary(model)$'coefficients'[,1], rse, ii)
  coefCrossVal=rbind(coefCrossVal, trainRes)
  
  # get performance
  preds=trainRes[,1] %*% t(model.matrix(model) )
  perf=c(perf, rmse(preds, test$strike))
}

# making dataframe and adding confidence interval
crossvalData = data.frame(cbind(rownames(coefCrossVal), coefCrossVal[,1], coefCrossVal[,2], coefCrossVal[,3]))
names(crossvalData)=c('var', 'est', 'stderr', 'rand')
crossvalData[,2] = as.numeric(as.character(crossvalData[,2]))
crossvalData[,3] = as.numeric(as.character(crossvalData[,3]))
crossvalData$hi95 = crossvalData$est + qnorm(0.975)*crossvalData$stderr
crossvalData$lo95 = crossvalData$est - qnorm(0.975)*crossvalData$stderr
crossvalData = crossvalData[-c(1, 10:14, 23:27, 36:40, 49:53, 62:66, 75:78),] # removing intercept and minor control variables for presentation purpose
cvname = c('Strike Mood', 'Institutional Constraint', 'Predicted Support \n by the Pivotal Legislator', 
           'Predicted Support \n by the Supreme Court', 'Floor Median Distance \n from Court', 
           'President Distance \n from Court', 'House Jud. Chair \n Distance from Court', 'Senate Jud. Chair \n Distance from Court')
cvname=rep(cvname, 6)
crossvalData = cbind(crossvalData, cvname)

# plotting
cvplot = ggplot(crossvalData, aes(x=factor(rand), y=est))
cvplot = cvplot + geom_point() + xlab("") + ylab("")
cvplot = cvplot + geom_linerange(aes(ymin=lo95, ymax=hi95), lwd=.5)
cvplot = cvplot + facet_wrap(~cvname, scales='free_y')
cvplot = cvplot + geom_hline(aes(y=0), color='red', linetype=2)
cvplot


## (2) vertical cases

# partitioning 6 subsamples
halljr_ver$rand = sample(1:6, nrow(halljr_ver), replace=TRUE)
table(halljr_ver$rand)

# making 6 folds data
rmse=function(pred, actual){sqrt(mean(pred-actual)^2)}
coefCrossVal2 = NULL
perf2= NULL

for (ii in 1:6){
  train2=halljr_ver[halljr_ver$rand!=ii,]
  test2=halljr_ver[halljr_ver$rand==ii,]
  
  # get coefficients
  model2=glm(formjr3, data=train2, family = "binomial")
  rse2 = sqrt(diag(vcovHC(model2, type = "HC1")))
  trainRes2=cbind(summary(model2)$'coefficients'[,1], rse2, ii)
  coefCrossVal2=rbind(coefCrossVal2, trainRes2)
  
  # get performance
  preds2=trainRes2[,1] %*% t(model.matrix(model2) )
  perf2=c(perf2, rmse(preds2, test2$strike))
}

# making dataframe and adding confidence interval
crossvalData2 = data.frame(cbind(rownames(coefCrossVal2), coefCrossVal2[,1], coefCrossVal2[,2], coefCrossVal2[,3]))
names(crossvalData2)=c('var', 'est', 'stderr', 'rand')
crossvalData2[,2] = as.numeric(as.character(crossvalData2[,2]))
crossvalData2[,3] = as.numeric(as.character(crossvalData2[,3]))
crossvalData2$hi95 = crossvalData2$est + qnorm(0.975)*crossvalData2$stderr
crossvalData2$lo95 = crossvalData2$est - qnorm(0.975)*crossvalData2$stderr
crossvalData2 = crossvalData2[-c(1, 10:14, 23:27, 36:40, 49:53, 62:66, 75:78),] # removing intercept and minor control variables for presentation purpose
crossvalData2 = cbind(crossvalData2, cvname)

# plotting
crossvalData3 = rbind(crossvalData, crossvalData2)
group = rep(c('Lateral Cases', 'Vertical Cases'), each=48)
crossvalData3 = cbind(crossvalData3, group)
cvplot2 = ggplot(crossvalData2, aes(x=factor(rand), y=est))
cvplot2 = cvplot2 + geom_line(crossvalData, aes(x=factor(rand), y=est))
cvplot2 = cvplot2 + geom_point() + xlab("") + ylab("")
cvplot2 = cvplot2 + geom_linerange(aes(ymin=lo95, ymax=hi95), lwd=.5)
cvplot2 = cvplot2 + facet_wrap(~cvname, scales='free_y')
cvplot2 = cvplot2 + geom_hline(aes(y=0), color='red', linetype=2)
cvplot2

### Combining two cross-validation plots into one
cvplot3 = ggplot(crossvalData3, aes(x=factor(rand), y=est, colour=group))
cvplot3 = cvplot3 + theme_bw() + geom_linerange(aes(ymin=lo95, ymax=hi95), lwd=1, position=position_dodge(width=.5) )
cvplot3 = cvplot3 + geom_point(position=position_dodge(width=.5), shape=21, fill='white') + xlab("") + ylab("")
cvplot3 = cvplot3 + facet_wrap(~cvname, scales='free_y')
cvplot3 = cvplot3 + geom_hline(aes(y=0), color='red', linetype=2)
cvplot3 = cvplot3 + theme(legend.position='bottom', legend.title=element_blank())
cvplot3

## making pdf of this plot
pdf("cvplot.pdf", width=7, height=7)
cvplot3
dev.off()


############################## 4. predicted probabilities ################################

###### IV(1): Strike Mood #######

## (1) lateral cases

# I'm interested in the effect of moodstrike and I hold rest of other variables at their mean
moodRange = seq(min(halljr_lat$moodstrike), max(halljr_lat$moodstrike), length.out=100)
scen = with(data=halljr_lat, cbind(1, moodRange, mean(halljr_lat$constraintp), mean(halljr_lat$FloorMedianOverModel),
                                   mean(halljr_lat$JCSPredProb), mean(halljr_lat$ConstraintDist), 
                                   mean(halljr_lat$PresCourtDist), mean(halljr_lat$HJchairDist), 
                                   mean(halljr_lat$SJchairDist), mean(halljr_lat$bills), 
                                   mean(halljr_lat$sgsupp), mean(halljr_lat$amisupp), mean(halljr_lat$amiopp)))

# Calculate predicted log odd values
predVal = scen %*% coef(modeljr4)

# Apply link function to get predicted probabilities
predProbs = 1/(1 + exp(-predVal))
summary(predProbs)

# Plot result
ggData = data.frame(moodRange, predProbs)
names(ggData) = c('strikemood', 'invalidation')
ggplot(data=ggData, aes(x=strikemood, y=invalidation)) + geom_line() + ylim(0,0.6)

# Add confidence intervals by using simulation (to use robust standared error)

sims=1000
draws = mvrnorm(sims, coef(modeljr4), vcovHC(modeljr4, type='HC1') )
predVal_rob = draws %*% t(scen)
predUncert = apply(predVal_rob, 2, function(x) 1/(1+exp(-x)) )

ppData = t(apply(predUncert, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
ppData = data.frame(ppData)
colnames(ppData) = c('Invalidation', 'Lo95', 'Hi95') 
ppData$moodstrike = moodRange
ggplot(ppData, aes(x=moodstrike, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## (2) vertical cases
moodRange2 = seq(min(halljr_ver$moodstrike), max(halljr_ver$moodstrike), length.out=100)
scen2 = with(data=halljr_ver, cbind(1, moodRange2, mean(halljr_ver$constraintp), mean(halljr_ver$FloorMedianOverModel),
                                    mean(halljr_ver$JCSPredProb), mean(halljr_ver$ConstraintDist), 
                                    mean(halljr_ver$PresCourtDist), mean(halljr_ver$HJchairDist), 
                                    mean(halljr_ver$SJchairDist), mean(halljr_ver$bills), 
                                    mean(halljr_ver$sgsupp), mean(halljr_ver$amisupp), mean(halljr_ver$amiopp)))

# Calculate predicted log odd values
predVal2 = scen2 %*% coef(modeljr3)

# Apply link function to get predicted probabilities
predProbs2 = 1/(1 + exp(-predVal2))
summary(predProbs2)

# Plot result
ggData2 = data.frame(moodRange2, predProbs2)
names(ggData2) = c('strikemood', 'invalidation')
ggplot(data=ggData2, aes(x=strikemood, y=invalidation)) + geom_line() + ylim(0,0.6)

# Add confidence intervals by using simulation
sims=1000
draws2 = mvrnorm(sims, coef(modeljr3), vcovHC(modeljr3, type='HC1') )
predVal_rob2 = draws2 %*% t(scen2)
predUncert2 = apply(predVal_rob2, 2, function(x) 1/(1+exp(-x)) )

ppData2 = t(apply(predUncert2, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
ppData2 = data.frame(ppData2)
colnames(ppData2) = c('Invalidation', 'Lo95', 'Hi95') 
ppData2$moodstrike = moodRange2
ggplot(ppData2, aes(x=moodstrike, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

### Combining two plots into one
ppData_c = rbind(ppData, ppData2)
ppname = rep(c('Lateral Cases', 'Vertical Cases'), each=100)
ppData_c = cbind(ppData_c, ppname)

ppplot =ggplot()
ppplot = ppplot + theme_bw() + geom_line(data=ppData_c, aes(x=moodstrike, y=Invalidation, color=factor(ppname)))
ppplot = ppplot + geom_ribbon(data=ppData_c, alpha=0.3, aes(x=moodstrike, y=Invalidation,
                                                            ymin=Lo95, ymax=Hi95, fill=factor(ppname))) + ylim(0,1) +
  ylab("Probability of Invalidation \n") + ggtitle("(1) Strike Mood and Invalidation \n")
ppplot = ppplot + theme(legend.position='bottom', legend.title=element_blank())
ppplot = ppplot + scale_x_continuous(name='\n Strike Mood',limits=c(-1.8,1.82), breaks=seq(-1.8,1.82, 0.5), expand=c(0,0))
ppplot

## Making pdf of this plot
pdf("predprobmood.pdf")
ppplot
dev.off()

###### IV(2): Institutional Constraint #######

## (1) lateral cases

# I'm interested in the effect of constraint and hold other variables at their mean
cRange = seq(min(halljr_lat$constraintp), max(halljr_lat$constraintp), length.out=100)
scen_c1 = with(data=halljr_lat, cbind(1, mean(moodstrike), cRange, mean(halljr_lat$FloorMedianOverModel),
                                      mean(halljr_lat$JCSPredProb), mean(halljr_lat$ConstraintDist), 
                                      mean(halljr_lat$PresCourtDist), mean(halljr_lat$HJchairDist), 
                                      mean(halljr_lat$SJchairDist), mean(halljr_lat$bills), 
                                      mean(halljr_lat$sgsupp), mean(halljr_lat$amisupp), mean(halljr_lat$amiopp)))

# Calculate predicted log odd values
predVal_c1 = scen_c1 %*% coef(modeljr4)

# Apply link function to get predicted probabilities
predProbs_c1 = 1/(1 + exp(-predVal_c1))
summary(predProbs_c1)

# Plot result
cData = data.frame(cRange, predProbs_c1)
names(cData) = c('constraint', 'invalidation')
ggplot(data=cData, aes(x=constraint, y=invalidation)) + geom_line() + ylim(0,0.3)

# Add confidence intervals by using simulation
sims=1000
draws = mvrnorm(sims, coef(modeljr4), vcovHC(modeljr4, type='HC1') )
predVal_rob_c1 = draws %*% t(scen_c1)
predUncert_c1 = apply(predVal_rob_c1, 2, function(x) 1/(1+exp(-x)) )

qqData = t(apply(predUncert_c1, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
qqData = data.frame(qqData)
colnames(qqData) = c('Invalidation', 'Lo95', 'Hi95') 
qqData$Constraint = cRange
ggplot(qqData, aes(x=Constraint, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## (2) plotting vertical cases

cRange2 = seq(min(halljr_ver$constraintp), max(halljr_ver$constraintp), length.out=100)
scen_c2 = with(data=halljr_ver, cbind(1, mean(moodstrike), cRange2, mean(halljr_ver$FloorMedianOverModel),
                                      mean(halljr_ver$JCSPredProb), mean(halljr_ver$ConstraintDist), 
                                      mean(halljr_ver$PresCourtDist), mean(halljr_ver$HJchairDist), 
                                      mean(halljr_ver$SJchairDist), mean(halljr_ver$bills), 
                                      mean(halljr_lat$sgsupp), mean(halljr_ver$amisupp), mean(halljr_ver$amiopp)))

# Calculate predicted log odd values
predVal_c2 = scen_c2 %*% coef(modeljr3)

# Apply link function to get predicted probabilities
predProbs_c2 = 1/(1 + exp(-predVal_c2))
summary(predProbs_c2)

# Plot result
cData2 = data.frame(cRange2, predProbs_c2)
names(cData2) = c('constraint', 'invalidation')
ggplot(data=cData2, aes(x=constraint, y=invalidation)) + geom_line() + ylim(0,0.3)

# Add confidence intervals by using simulation
sims=1000
draws2 = mvrnorm(sims, coef(modeljr3), vcovHC(modeljr3, type='HC1') )
predVal_rob_c2 = draws2 %*% t(scen_c2)
predUncert_c2 = apply(predVal_rob_c2, 2, function(x) 1/(1+exp(-x)) )

qqData2 = t(apply(predUncert_c2, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
qqData2 = data.frame(qqData2)
colnames(qqData2) = c('Invalidation', 'Lo95', 'Hi95') 
qqData2$Constraint = cRange2
ggplot(qqData2, aes(x=Constraint, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

### combining two plots into one!
qqData_c = rbind(qqData, qqData2)
ppname = rep(c('Lateral Cases', 'Vertical Cases'), each=100)
qqData_c = cbind(qqData_c, ppname)

qqplot =ggplot()
qqplot = qqplot + theme_bw() + geom_line(data=qqData_c, aes(x=Constraint, y=Invalidation, color=factor(ppname)))
qqplot = qqplot + geom_ribbon(data=qqData_c, alpha=0.3, aes(x=Constraint, y=Invalidation,
                                                            ymin=Lo95, ymax=Hi95, fill=factor(ppname))) + ylim(0,0.5) +
  ylab("Probability of Invalidation \n") + ggtitle("(2) Institutional Constraint and Invalidation \n")
qqplot = qqplot + theme(legend.position='bottom', legend.title=element_blank())
qqplot = qqplot + scale_x_continuous(name='\n Institutional Constraint', limits=c(0,0.1), breaks=seq(0,0.1,0.02), expand=c(0,0))
qqplot

## making pdf of this plot

pdf("predprobconstraint.pdf")
qqplot
dev.off()


#### IV(3): Predicted Support by the Pivotal Legislator: FloorMeidanOverModel

## (1) lateral cases

# let's say we are interested in the effect of predicted support by the pivotal legislator
# we hold rest of other variables at their mean
medRange = seq(min(halljr_lat$FloorMedianOverModel), max(halljr_lat$FloorMedianOverModel), length.out=100)
medscen = with(data=halljr_lat, cbind(1, mean(halljr_lat$moodstrike), mean(halljr_lat$constraintp), medRange,
                                      mean(halljr_lat$JCSPredProb), mean(halljr_lat$ConstraintDist), 
                                      mean(halljr_lat$PresCourtDist), mean(halljr_lat$HJchairDist), 
                                      mean(halljr_lat$SJchairDist), mean(halljr_lat$bills), 
                                      mean(halljr_lat$sgsupp), mean(halljr_lat$amisupp), mean(halljr_lat$amiopp)))

# Calculate predicted log odd values
medPredVal = medscen %*% coef(modeljr4)

# Apply link function to get predicted probabilities
medpredProbs = 1/(1 + exp(-medPredVal))
summary(medpredProbs)

# Plot result
medData = data.frame(medRange, medpredProbs)
names(medData) = c('median', 'invalidation')
ggplot(data=medData, aes(x=median, y=invalidation)) + geom_line() + ylim(0,0.1)

# Add confidence intervals by using simulation

sims=1000
draws = mvrnorm(sims, coef(modeljr4), vcovHC(modeljr4, type='HC1') )
medPredVal_rob = draws %*% t(medscen)
medpredUncert = apply(medPredVal_rob, 2, function(x) 1/(1+exp(-x)) )

medData_plot = t(apply(medpredUncert, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
medData_plot = data.frame(medData_plot)
colnames(medData_plot) = c('Invalidation', 'Lo95', 'Hi95') 
medData_plot$median = medRange
ggplot(medData_plot, aes(x=median, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## (2) plotting for lateral cases

medRange2 = seq(min(halljr_ver$FloorMedianOverModel), max(halljr_ver$FloorMedianOverModel), length.out=100)
medscen2 = with(data=halljr_ver, cbind(1, mean(halljr_ver$moodstrike), mean(halljr_ver$constraintp), medRange,
                                       mean(halljr_ver$JCSPredProb), mean(halljr_ver$ConstraintDist), 
                                       mean(halljr_ver$PresCourtDist), mean(halljr_ver$HJchairDist), 
                                       mean(halljr_ver$SJchairDist), mean(halljr_ver$bills), 
                                       mean(halljr_ver$sgsupp), mean(halljr_ver$amisupp), mean(halljr_ver$amiopp)))

# Calculate predicted log odd values
medPredVal2 = medscen2 %*% coef(modeljr3)

# Apply link function to get predicted probabilities
medpredProbs2 = 1/(1 + exp(-medPredVal2))
summary(medpredProbs2)

# Plot result
medData2 = data.frame(medRange2, medpredProbs2)
names(medData2) = c('median', 'invalidation')
ggplot(data=medData2, aes(x=median, y=invalidation)) + geom_line()

# Add confidence intervals by using simulation

sims=1000
draws2 = mvrnorm(sims, coef(modeljr3), vcovHC(modeljr3, type='HC1') )
medPredVal_rob2 = draws2 %*% t(medscen2)
medpredUncert2 = apply(medPredVal_rob2, 2, function(x) 1/(1+exp(-x)) )

medData_plot2 = t(apply(medpredUncert2, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
medData_plot2 = data.frame(medData_plot2)
colnames(medData_plot2) = c('Invalidation', 'Lo95', 'Hi95') 
medData_plot2$median = medRange2
ggplot(medData_plot2, aes(x=median, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## combining two plots into one!! (it looks good!)
medData_c = rbind(medData_plot, medData_plot2)
ppname = rep(c('Lateral Cases', 'Vertical Cases'), each=100)
medData_c = cbind(medData_c, ppname)

medplot =ggplot()
medplot = medplot + theme_bw() + geom_line(data=medData_c, aes(x=median, y=Invalidation, color=factor(ppname)))
medplot = medplot + geom_ribbon(data=medData_c, alpha=0.3, aes(x=median, y=Invalidation,
                                                               ymin=Lo95, ymax=Hi95, fill=factor(ppname))) + ylim(0,1) +
  ylab("Probability of Invalidation \n") + ggtitle("(1) Predicted Support by the Pivotal Legislator and Invalidation \n")
medplot = medplot + theme(legend.position='bottom', legend.title=element_blank())
medplot = medplot + scale_x_continuous(name='\n Predicted Support by the Pivotal Legislator',limits=c(0.05,1), breaks=seq(0, 1, 0.2), expand=c(0,0))
medplot

## making pdf of this plot
pdf("medplot.pdf")
medplot
dev.off()


####### IV(4): Predicted Support by the Court: JCSPredProb

## (1) lateral cases

# I'm interested in the effect of predicted support by the court
# and I hold rest of other variables at their mean
jcsRange = seq(min(halljr_lat$JCSPredProb), max(halljr_lat$JCSPredProb), length.out=100)
scen_jcs1 = with(data=halljr_lat, cbind(1, mean(halljr_lat$moodstrike), mean(halljr_lat$constraintp), mean(halljr_lat$FloorMedianOverModel),
                                        jcsRange, mean(halljr_lat$ConstraintDist), 
                                        mean(halljr_lat$PresCourtDist), mean(halljr_lat$HJchairDist), 
                                        mean(halljr_lat$SJchairDist), mean(halljr_lat$bills), 
                                        mean(halljr_lat$sgsupp), mean(halljr_lat$amisupp), mean(halljr_lat$amiopp)))

# Calculate predicted log odd values
predVal_jcs1 = scen_jcs1 %*% coef(modeljr4)

# Apply link function to get predicted probabilities
predProbs_jcs1 = 1/(1 + exp(-predVal_jcs1))
summary(predProbs_jcs1)

# Plot result
ggData_jcs1 = data.frame(jcsRange, predProbs_jcs1)
names(ggData_jcs1) = c('jcs', 'invalidation')
ggplot(data=ggData_jcs1, aes(x=jcs, y=invalidation)) + geom_line() + ylim(0,0.1)

# Add confidence intervals by using simulation
sims=1000
draws = mvrnorm(sims, coef(modeljr4), vcovHC(modeljr4, type='HC1') )
predVal_rob_jcs1 = draws %*% t(scen_jcs1)
predUncert_jcs1 = apply(predVal_rob_jcs1, 2, function(x) 1/(1+exp(-x)) )

ppData_jsc1 = t(apply(predUncert_jcs1, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
ppData_jsc1 = data.frame(ppData_jsc1)
colnames(ppData_jsc1) = c('Invalidation', 'Lo95', 'Hi95') 
ppData_jsc1$JCSPredProb = jcsRange
ggplot(ppData_jsc1, aes(x=JCSPredProb, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## (2) vertical cases

jcsRange_ver = seq(min(halljr_ver$JCSPredProb), max(halljr_ver$JCSPredProb), length.out=100)
scen_jcs2 = with(data=halljr_ver, cbind(1, mean(halljr_ver$moodstrike), mean(halljr_ver$constraintp), mean(halljr_ver$FloorMedianOverModel),
                                        jcsRange_ver, mean(halljr_ver$ConstraintDist), 
                                        mean(halljr_ver$PresCourtDist), mean(halljr_ver$HJchairDist), 
                                        mean(halljr_ver$SJchairDist), mean(halljr_ver$bills), 
                                        mean(halljr_ver$sgsupp), mean(halljr_ver$amisupp), mean(halljr_ver$amiopp)))

# Calculate predicted log odd values
predVal_jcs2 = scen_jcs2 %*% coef(modeljr3)

# Apply link function to get predicted probabilities
predProbs_jcs2 = 1/(1 + exp(-predVal_jcs2))
summary(predProbs_jcs2)

# Plot result
ggData_jcs2 = data.frame(jcsRange_ver, predProbs_jcs2)
names(ggData_jcs2) = c('jcs', 'invalidation')
ggplot(data=ggData_jcs2, aes(x=jcs, y=invalidation)) + geom_line()

# Add confidence intervals by using simulation

sims=1000
draws2 = mvrnorm(sims, coef(modeljr3), vcovHC(modeljr3, type='HC1') )
predVal_rob_jcs2 = draws2 %*% t(scen_jcs2)
predUncert_jcs2 = apply(predVal_rob_jcs2, 2, function(x) 1/(1+exp(-x)) )

ppData_jsc2 = t(apply(predUncert_jcs2, 2, function(x){ 
  mean = mean(x)
  qlo95 = quantile(x, 0.025)
  qhi95 = quantile(x, 0.975)
  rbind(mean, qlo95, qhi95) } ))
ppData_jsc2 = data.frame(ppData_jsc2)
colnames(ppData_jsc2) = c('Invalidation', 'Lo95', 'Hi95') 
ppData_jsc2$JCSPredProb = jcsRange_ver
ggplot(ppData_jsc2, aes(x=JCSPredProb, y=Invalidation, ymin=Lo95, ymax=Hi95)) + geom_line() + geom_ribbon(alpha=.5)

## combining two plots into one
ppData_jcs_c = rbind(ppData_jsc1, ppData_jsc2)
ppname = rep(c('Lateral Cases', 'Vertical Cases'), each=100)
ppData_jcs_c = cbind(ppData_jcs_c, ppname)

ppplot_jcs =ggplot()
ppplot_jcs = ppplot_jcs + theme_bw() + geom_line(data=ppData_jcs_c, aes(x=JCSPredProb, y=Invalidation, color=factor(ppname)))
ppplot_jcs = ppplot_jcs + geom_ribbon(data=ppData_jcs_c, alpha=0.3, aes(x=JCSPredProb, y=Invalidation,
                                                                        ymin=Lo95, ymax=Hi95, fill=factor(ppname))) + ylim(0,1) +
  ylab("Probability of Invalidation \n") + ggtitle("(2) Predicted Support by the Supreme Court and Invalidation \n")
ppplot_jcs = ppplot_jcs + theme(legend.position='bottom', legend.title=element_blank())
ppplot_jcs = ppplot_jcs + scale_x_continuous(name='\n Predicted Support by the Supreme Court',limits=c(0.2,1), breaks=seq(0.2, 1, 0.2), expand=c(0,0))
ppplot_jcs

# making pdf of this plot
pdf("jcsplot.pdf")
ppplot_jcs
dev.off()

############################## 5. ROC Curve and Separation Plots ################################

#### (1) Lateral Cases

# So first lets calculate pi:
beta = coef(modeljr4)
X = data.matrix(cbind(1, halljr_lat$moodstrike, halljr_lat$constraintp, halljr_lat$FloorMedianOverModel,
                      halljr_lat$JCSPredProb, halljr_lat$ConstraintDist, 
                      halljr_lat$PresCourtDist, halljr_lat$HJchairDist, 
                      halljr_lat$SJchairDist, halljr_lat$bills, 
                      halljr_lat$sgsupp, halljr_lat$amisupp, halljr_lat$amiopp))
predProbs_roc = 1/(1+exp(-X %*% beta))
round(predProbs_roc, 3)

# To generate a ROC plot

roc = function(prediction, actual){
  library(ROCR)
  pred = prediction(prediction, actual)
  perf = performance(pred,"tpr","fpr")
  rocData = data.frame(attributes(perf)$x.values[[1]], attributes(perf)$y.values[[1]])
  names(rocData) = c('FPR', 'TPR')
  return(rocData)
}

# Plot roc plot
rocCurve_lat = roc(predProbs_roc, halljr_lat$strike)
ggplot(rocCurve_lat, aes(x=FPR, y=TPR)) + geom_line() + geom_abline(intercept=0, slope=1) 

# Approximating area under the curve
getAUC = function(prediction, actual){
  library(ROCR)
  pred = prediction(prediction, actual) 
  attributes(performance(pred,"auc"))$y.values[[1]]
}
AUC_lat = getAUC(predProbs_roc, halljr_lat$strike)

# separation plot for lateral cases
pdf("sepplot1.pdf", width=7, height=2)
separationplot(actual=as.vector(halljr_lat$strike), pred=as.vector(predProbs_roc), newplot=FALSE)
dev.off()

#### (2) Vertical Cases

# So first lets calculate pi:
beta_ver = coef(modeljr3)
X_ver = data.matrix(cbind(1, halljr_ver$moodstrike, halljr_ver$constraintp, halljr_ver$FloorMedianOverModel,
                          halljr_ver$JCSPredProb, halljr_ver$ConstraintDist, 
                          halljr_ver$PresCourtDist, halljr_ver$HJchairDist, 
                          halljr_ver$SJchairDist, halljr_ver$bills, 
                          halljr_ver$sgsupp, halljr_ver$amisupp, halljr_ver$amiopp))
predProbs_roc2 = 1/(1+exp(-X_ver %*% beta_ver))
round(predProbs_roc2, 3)

# Plot roc plot
rocCurve_ver = roc(predProbs_roc2, halljr_ver$strike)
ggplot(rocCurve_ver, aes(x=FPR, y=TPR)) + geom_line() + geom_abline(intercept=0, slope=1) 

# Approximating area under the curve
AUC_ver = getAUC(predProbs_roc2, halljr_ver$strike)


## combining two ROC plots into one
rocData = rbind(rocCurve_lat, rocCurve_ver)
n = data.frame(rep('Lateral Cases', 83)); names(n)='case'
nn = data.frame(rep('Vertical Cases', 91)); names(nn)='case'
rocname = rbind(n, nn)
rocData = cbind(rocData, rocname)

roccurve = ggplot(rocData, aes(x=FPR, y=TPR, color=case)) + geom_line(lwy=2) + geom_abline(intercept=0, slope=1, alpha=0.5)
roccurve = roccurve + theme_bw() + theme(legend.title=element_blank(), legend.justification = c(1.2, -0.2), legend.position = c(1, 0))
roccurve = roccurve + scale_x_continuous(name='\n False Positive Rate',limits=c(-0.05,1.05), breaks=seq(0, 1, 0.2), expand=c(0,0))
roccurve = roccurve + scale_y_continuous(name='True Positive Rate \n',limits=c(-0.05,1.05), breaks=seq(0, 1, 0.2), expand=c(0,0))
roccurve = roccurve + scale_color_discrete(labels=c('Lateral Cases (AUC = 0.926)', 'Vertical Cases (AUC = 0.902)'))
roccurve

## making pdf of this plot
pdf("roccurve.pdf")
roccurve
dev.off()

## separation plot for vertical cases

pdf("sepplot2.pdf", width=7, height=2)
separationplot(actual=as.vector(halljr_ver$strike), pred=as.vector(predProbs_roc2), newplot=FALSE)
dev.off()


########################## 6. Extension: Adding "Salience" Variable ############################

# Running new model with including salience
form_salience = formula(strike ~ moodstrike + constraintp + nyt_salient + FloorMedianOverModel + JCSPredProb + ConstraintDist + 
                          PresCourtDist + HJchairDist + SJchairDist + bills + sgsupp + amisupp + amiopp)
# lateral cases
lat_salience = glm(form_salience, data=halljr_lat, family="binomial")
summary(lat_salience)
coeftest(lat_salience, vcov=vcovHC(lat_salience, type='HC')) # to get robust standard error

# vertical cases
ver_salience = glm(form_salience, data=halljr_ver, family="binomial")
summary(ver_salience)
coeftest(ver_salience, vcov=vcovHC(ver_salience, type='HC'))

########## Coefficient plots

## (1) Lateral Cases

clat = data.frame(lat_salience$coefficients)
names(clat)="coef"
cov.modeljr <- vcovHC(lat_salience, type = "HC")
rob.std.err <- sqrt(diag(cov.modeljr))
slat = data.frame(rob.std.err)
names(slat)="sd"

upper95 = data.frame(clat + qnorm(0.975)*slat)
names(upper95)="upper95"
lower95 = data.frame(clat - qnorm(0.975)*slat)
names(lower95)="lower95"
coefData_s = cbind(clat, slat, upper95, lower95) 
varname = c('(Intercept)', 'Strike Mood', 'Institutional \n Constraint', 'Salience', 'Predicted Support \n by the Pivotal Legislator',
            'Predicted Support \n by the Supreme Court', 'Floor Median Distance \n from Court', 'President Distance \n from Court',
            'House Jud. Chair \n Distance from Court','Senate Jud. Chair \n Distance from Court', 'Court-Curbing \n Bills',
            'Solicitor General \n Support', 'Amicus Briefs \n in Favor of the Law', 'Amicus Briefs \n Opposing the Law')
coefData_s = cbind(varname, coefData_s)

## For presentation purpose: scaling constraint variable because it is too big: 1/100
## Omitting intercept and four control variables
coefData_s_scaled = coefData_s
coefData_s_scaled[3,]=coefData_s_scaled[3,]*.01
coefData_s_scaled = cbind(varname, coefData_s_scaled[,2:5])
coefData_s_scaled = coefData_s_scaled[2:10,]
colnames(coefData_s_scaled)=c('varname', 'coef', 'sd', 'upper95', 'lower95')

## plotting
coefplot_s1 = ggplot(data=coefData_s_scaled, aes(x=varname))
coefplot_s1 = coefplot_s1 + geom_linerange(aes(ymin=lower95, ymax=upper95), size=.5) 
coefplot_s1 = coefplot_s1 + geom_point(aes(y=coef))
coefplot_s1 = coefplot_s1 + geom_hline(yintercept=0, linetype=2, color = "red") 
coefplot_s1 = coefplot_s1 + coord_flip() + xlab('') + ylab('Estimates')
coefplot_s1 = coefplot_s1 + theme(axis.ticks=element_blank())
coefplot_s1

### (2) Vertical Cases

cver = data.frame(ver_salience$coefficients)
names(cver)="coef"
vcovver <- vcovHC(ver_salience, type = "HC")
rsever <- sqrt(diag(vcovver))
sver = data.frame(rsever)
names(sver)="sd"

upp95 = data.frame(cver + qnorm(0.975)*sver)
names(upp95)="upper95"
low95 = data.frame(cver - qnorm(0.975)*sver)
names(low95)="lower95"
coefData_sv = cbind(cver, sver, upp95, low95) 
varname = c('(Intercept)', 'Strike Mood', 'Institutional \n Constraint', 'Salience', 'Predicted Support \n by the Pivotal Legislator',
            'Predicted Support \n by the Supreme Court', 'Floor Median Distance \n from Court', 'President Distance \n from Court',
            'House Jud. Chair \n Distance from Court','Senate Jud. Chair \n Distance from Court', 'Court-Curbing \n Bills',
            'Solicitor General \n Support', 'Amicus Briefs \n in Favor of the Law', 'Amicus Briefs \n Opposing the Law')
coefData_sv = cbind(varname, coefData_sv)

## For presentation purpose: scaling constraint variable and floor median variable: 1/10
## Omitting intercept and four control variables
coefData_sv_scaled = coefData_sv
coefData_sv_scaled[3,]=coefData_sv_scaled[3,]*.1
coefData_sv_scaled[7,]=coefData_sv_scaled[7,]*.1
coefData_sv_scaled = cbind(varname, coefData_sv_scaled[,2:5])
coefData_sv_scaled = coefData_sv_scaled[2:10,]

## plotting
coefplot_s2 = ggplot(data=coefData_sv_scaled, aes(x=varname)) ## scaling constraint variable (strongly recommended!!)
coefplot_s2 = coefplot_s2 + geom_linerange(aes(ymin=lower95, ymax=upper95), size=.5) 
coefplot_s2 = coefplot_s2 + geom_point(aes(y=coef))
coefplot_s2 = coefplot_s2 + geom_hline(yintercept=0, linetype=2, color = "red") 
coefplot_s2 = coefplot_s2 + coord_flip() + xlab('') + ylab('Estimates')
coefplot_s2 = coefplot_s2 + theme(axis.ticks=element_blank())
coefplot_s2

## combining two plots into one
salplotData = rbind(coefData_s_scaled, coefData_sv_scaled)
name=rep(c('Lateral Cases', 'Vertical Cases'), each=9)
salplotData = cbind(salplotData, name)

salplot = ggplot(salplotData, aes(x=varname, color=name))
salplot = salplot + theme_bw() + geom_linerange(aes(ymin=lower95, ymax=upper95), lwd=1, position=position_dodge(width=.5)) 
salplot=salplot + geom_point(aes(y=coef), position=position_dodge(width=.5), shape=21, fill='white')
salplot=salplot + geom_hline(yintercept=0, linetype=2, color = "red") 
salplot=salplot + coord_flip() + xlab('') + ylab('')
salplot = salplot + theme(legend.position='bottom', legend.title=element_blank())
salplot

## making pdf of this plot
pdf("salplot.pdf", width=8, height=6)
salplot
dev.off()


########################## 7. Problem inherent in Hall's model ############################

#### Histogram

hist = qplot(halljr_lat$constraintp, data=halljr_lat, geom="histogram", binwidth=0.01)
hist = hist + theme_bw() + xlab("\n Institutional Constraint") + ggtitle("Histogram of Institional Constraint \n")
hist = hist + scale_y_continuous(name='Frequency \n',limits=c(0,65), breaks=seq(0, 65, 10), expand=c(0,0))
hist

pdf("histogram.pdf")
hist
dev.off()

#### Standardized Residual vs. Institutional Constraints Plot
rstandard = data.frame(rstandard(modeljr4))
residualData = cbind(halljr_lat$constraintp, rstandard)
names(residualData) = c('Constraint', 'SR')

residualplot = ggplot(data=residualData, aes(x=Constraint, y=SR))
residualplot = residualplot + theme_bw() + geom_point(aes(x=Constraint, y=SR), size=3) + 
  xlab("\n Institutional Constraint") + ylab("Standardized Residual \n") + 
  ggtitle("Standardized Residual vs. Institutional Constraint \n")
residualplot = residualplot + geom_hline(yintercept=0, linetype=2, color = "red") 
residualplot

pdf("residualplot.pdf")
residualplot
dev.off()